
use "C:\Users\awcassidy1\Dropbox\jmp_new/cleaned_data/test_data.dta", clear	
cap drop _merge	

if `use_test_data'== 1 {
	use test_data.dta, clear
	cap drop _merge	
	cap drop ah_type
	cap drop ah_location
	
}
else {
	use "C:\Users\awcassidy1\Dropbox\jmp_new/cleaned_data/estimation_temp.dta", clear
	keep if !missing(pricediff)

}


********************************************************************************
*New chart 2 dimensional: each variable separately
********************************************************************************

gen n=1
merge m:1 n using "C:\Users\awcassidy1\Dropbox\jmp_new/cleaned_data/survey_sum.dta"


cap drop notraddr
gen notraddr=0
replace notraddr=1 if rrec==0
rename waterheaterfueltype whftype
rename furnacefueltype fftype
rename waterheatertanktype wh_tanktype
rename ductsystem1type duct_type

cap rename system1airhandlertype ah_type
cap rename system1locationairhandler ah_location

cap rename info_mean_waterheaterfueltype info_mean_whftype
cap rename info_mean_furnacefueltype info_mean_fftype
cap rename info_mean_ductsystem1type info_mean_duct_type
cap rename info_mean_waterheatertanktype info_mean_wh_tanktype

local less_observable_vars="atticrvalue ductravg duct_type negleakavg rsavg  eeravg"
local more_observable_vars="ah_location sizeavg progtherm fftype whftype negsysageavg wh_tanktype"
local other_less_observable_vars="ah_type notwinrec"
local other_more_observable_vars="twosystems notraddr"

local extracted_1 "whftype fftype"
local extracted_2 "ah_location progtherm twosystems "
local extracted_3 "atticrvalue ductravg negleakavg  "

local parsimonious_less_vars " ductravg  atticrvalue negleakavg  "
local parsimonious_more_vars_1 "fftype whftype "
local parsimonious_more_vars_2 "ah_location progtherm twosystems"

local allvars "`less_observable_vars' `more_observable_vars' `other_less_observable_vars' `other_more_observable_vars'"

*IF we don't do this, then the predicted factors regressed on variables will not equal loadings.
foreach v of varlist `allvars' {
	keep if !missing(`v')
	}

foreach v of varlist `allvars' {
	sum `v'
	replace `v'=(`v'-r(mean))/r(sd)
	

	}

*now graph the loadings

	factor `allvars', pcf factors(3) blanks(.3)
	rotate, quartimin  blanks(.3)


	ereturn list
	*predict Loadings1 Loadings2 Loadings3

	mat Loadings=e(L)
	*Depends how loadings stored
	*mat Loadings=e(r_L)
	svmat Loadings


	la var sizeavg "Size"
	la var negsysageavg "-Age"
	la var wh_tanktype "Tankless/Solar WH"
	la var negleakavg "- % Leakage"
	
	gen load_lab=""
	gen load_var=""
	local dumnum=0
	foreach v in  `allvars' {
		local dumnum=`dumnum'+1
		replace load_lab="`: var label `v''" in `dumnum'
		replace load_var="`v'" in `dumnum'
		}


	rename Loadings1 more_fuel
	rename Loadings2 more_other
	rename Loadings3 less

	
	*create it for all variables. NOTE ORDER OF FACTORS CHANGES BECAUSE 
	*OTHERWISE THE CUBE WOULD HAVE DIFFERENT AXES.


if `factor_all'==0 & `use_means'==1 {
	egen More_Other=rowmean(`parsimonious_more_vars_2')
	egen More_Fuel=rowmean(whftype fftype)
	egen Less=rowmean(`parsimonious_less_vars')
	
}

*if not using the test data, get the loadings from external file so that they are calculated from test data.
*Note that they are almost the exact same if we calculate them from estimation data !


*IF you want to use estimated LOADINGS from test data but get their correlations 
*with variables from estimation data, do this:

if `factor_all'==1 & "`tt'"=="tt_using_est_load" & `use_test_data'==0 {
	cap drop _merge
	cap drop dum
	cap gen dum=1
	
	merge m:1 dum using "C:/Users/awcassidy1\Dropbox\jmp_new\cleaned_data/load_tt_from_test.dta"
	
	gen Less=0
	gen More_Fuel=0
	gen More_Other=0
	foreach v in `parsimonious_less_vars' {
		replace Less = Less+ `v'*`v'_Less
	}
	foreach v in `parsimonious_more_vars_1' {
		replace More_Fuel = More_Fuel+ `v'*`v'_More_Fuel
	}
	foreach v in `parsimonious_more_vars_2' {
		replace More_Other = More_Other+ `v'*`v'_More_Other
	}
	
}

*IF you want to re-estimate LOADINGS from the estimation data using the optimal
* treelet  stuff from the test data (and get their correlations 
*with variables from estimation data), do this:

if `factor_all'==1 & "`tt'"=="tt_using_test_load" & `use_test_data'==0 {
	cap drop Less More_Fuel More_Other
	set seed 123456
	
	*these are the optimal cut points from the other program.
	tt `less_observable_vars' `other_less_observable_vars',  cut(2) components(1)
	ttpredict Less
	
	
	set seed 123456
	tt `more_observable_vars' `other_more_observable_vars',  cut(3) components(2)
	ttpredict More_Fuel More_Other	
	
}



*IF you want to get the loadings and correlations from test data, do this:

if `factor_all'==1 & "`tt'"=="tt_using_test_load" & `use_test_data'==1 {
	set seed 123456
	
	*these are the optimal cut points from the other program.
	tt `less_observable_vars' `other_less_observable_vars',  cut(2) components(1)
	ttpredict Less
	
	
	set seed 123456
	tt `more_observable_vars' `other_more_observable_vars',  cut(3) components(2)
	ttpredict More_Fuel More_Other	
	
}



	
if `factor_all'==1 & `sem_solution'==0 & `use_means'==0 & "`tt'"=="no_tt"  {
		****************************************************************************
		*factor using all variables
		****************************************************************************
		factor `allvars', pcf 
		rotate, quartimin factors(3)
		
		local ordered_predlist "More_Other Less  More_Fuel"
		predict `ordered_predlist'
		
		*factor `allvars', pcf factors(3)
		*rotate, quartimin
		*predict  Less More_Other More_Fuel
	}

if  `factor_all'==0 & `sem_solution'==0 & `use_means'==0  & "`tt'"=="no_tt"  {

		****************************************************************************
		*create it for just extracted variables.
		****************************************************************************
		factor `parsimonious_less_vars'  fftype whftype `parsimonious_more_vars_2', pcf factors(3)
		rotate, quartimin factors(3)

		
		local ordered_predlist "More_Fuel Less More_Other"
		predict `ordered_predlist'
			
		}
		

	local anchor_Less="ductravg"
	local anchor_More_Fuel="fftype"
	local anchor_More_Other="ah_location"
	

	if `sem_solution'== 1  & `use_means'==0   & "`tt'"=="no_tt"  {
		****************************************************************************
		*overwrite loadings if you want to instead use dependence on predicted from sem.
		****************************************************************************
		sem (`parsimonious_less_vars' <-Less)(fftype  <-More_Fuel@a) ///
			(whftype  <-More_Fuel@a)(`parsimonious_more_vars_2'<-More_Other) , stand
			local ordered_predlist " More_Fuel More_Other Less"
			ereturn list
			
			predict  `ordered_predlist', latent(  `ordered_predlist') 
	}
	
	
local extracted="`extracted_1' `extracted_2' `extracted_3'"
local not_extracted="`allvars'"
foreach v of varlist `extracted' {
		local not_extracted=subinstr("`not_extracted'","`v'","",.)
}
local eig_sum=0
di "not_extracted=`not_extracted'"
foreach p in `ordered_predlist' {
	local var="`p'"
	di "var=`var'"
	local eig=0
	foreach v of varlist `allvars' {
			reg `v'  More_Fuel Less More_Other
			if `sem_solution'!=1 {
				local eig=`eig' + (_b[`var'])^2
				}
			if `sem_solution'==1 {
				local eig=`eig' + (_b[`var'])^4
				}
	}
local eig_`p' = `eig'
local eig_sum=`eig_sum'+`eig'
}

di "eig_More_Fuel=`eig_More_Fuel'"
di "eig_Less=`eig_Less'"
di "eig_Less=`eig_More_Other'"
di "eig_sum=`eig_sum'"

	gen load_More_Other=.
	gen load_More_Fuel=.
	gen load_Less=.
	

	
local sum_of_squared_corrs_extracted=0
local sum_of_squared_corrs_all=0
local ordered_predlist " More_Fuel More_Other Less "

	
	foreach predvar in `ordered_predlist' {
		foreach v of varlist `allvars' {
			*the following works for pcf but not ml.
			reg `anchor_`predvar'' `predvar'
			local anchor_load=_b[`predvar']
			
			*FOR PCF, the loadings equal the correlations 
			*between factor scores and the measures.
			*so we can add the correlations from the measures not extracted to see those, too.

			
			*for sem,
			reg `v' `predvar'
			local load=_b[`predvar']

			
			sum less more_fuel more_other if load_var=="`v'"
	
			*need to scale it to get loadings.
			
			
			local scaled_load=(`load'/`anchor_load')

			*sem solution: use anchor variable.
			if `sem_solution'==0 {
				local scaled_load=`load'
				}
				
			if `get_corr'==1 {
				corr `predvar' `v'
				local corr_load=r(rho)
				local scaled_load=`corr_load'
				local sum_of_squared_corrs_all=`sum_of_squared_corrs_all'+ (`scaled_load')^2
				if regexm("`parsimonious_less_vars'  fftype whftype `parsimonious_more_vars_2'","`v'")> 0 {
					local sum_of_squared_corrs_extracted=`sum_of_squared_corrs_extracted'+ (`scaled_load')^2
					}
				}
			
				
			replace load_`predvar'=`scaled_load' if load_var=="`v'"
			di "scald_load=`scaled_load'"
			
			
			if `scaled_load'>1 {
				di "predvar=`predvar'"
				di "anchor_load=`anchor_load'"
				di "scaled_load=`scaled_load'"
				di "anchor_predvar=`anchor_`predvar''"

				stop
				
				}
			
			}
		}
	di "sum_of_squared_corrs_all=`sum_of_squared_corrs_all'"
	di "sum_of_squared_corrs_extracted=`sum_of_squared_corrs_extracted'"

	****************************************************************************
	*now get the observability dimension for the graph.
	****************************************************************************
	
	*get list of info variables
	local list_of_info_vars=""
	foreach v in  `allvars' {
		local list_of_info_vars="`list_of_info_vars' info_mean_`v'"
		}
	mean `list_of_info_vars'
	ereturn list
	
	mat info_means=e(b)'
	svmat info_means
	
	*get fraction rather than percent.
	replace info_means=info_means/100
	
		

	*HACKEY SOLUTION
	local new_obs=_N+1
	set obs `new_obs'
	

	
	gen newvar=.



	*make t=nochange
	*Change c if number of vars changes.
	
	local minval=-1
	local maxval=1
	
	*put the heat dimension first here and in the middle when you call the graph.
	
	*z could be any of the three.
	*gen z=info_means
	
	
	gen z=load_More_Other
	gen y=load_More_Fuel
	gen x=load_Less
	
	
	
	
	
local vars_to_graph "`allvars'"
*local vars_to_graph "`extracted_1' `extracted_2' `extracted_3'"

*supress the variables we are not supposed to graph.
gen to_graph=0 if !missing(load_var)
foreach v of varlist `vars_to_graph' {
	replace to_graph=1 if load_var=="`v'"
	}
	
gen extracted=0 if !missing(load_var)
foreach v of varlist `extracted_1' `extracted_2' `extracted_3' {
	replace extracted=1 if load_var=="`v'"
	}

	
*min and max specific to each for readability
replace y=1 in 500
replace x=1 in 500
replace z=1 in 500

replace y=0 in 501
replace x=0 in 501
replace z= 0 in 501


replace to_graph=1 in 500
replace to_graph=1 in 501

replace extracted=1 in 500
replace extracted=1 in 501
replace extracted=1 in 499
replace extracted=1 in 498

gen color_adjustment=0
replace color_adjustment=1 in 498
replace color_adjustment=0 in 499

gen plotsize_adjustment=0
replace plotsize_adjustment=1 in 500
replace plotsize_adjustment=1 in 501

replace info_means=.1 in 498
replace info_means=.8 in 499

replace to_graph=1 in 498
replace to_graph=1 in 499

*make labels only for those we extracted
replace load_lab=""
replace load_lab="-% Leakage" if load_var=="negleakavg"
replace load_lab = "Duct R-val" if load_var=="ductravg"
replace load_lab= "Attic R-val" if load_var=="atticrvalue"
replace load_lab="Gas WH" if load_var=="whftype"
replace load_lab="Gas Furnace" if load_var=="fftype"
replace load_lab="2 Systems" if load_var=="twosystems"
replace load_lab="Prog Therm" if load_var=="progtherm"
replace load_lab="AH in Closet" if load_var=="ah_location"


*it screws up perspective when there are negative loadings, so make them 0 in that case
*otherwise, the corner of the cube is not at (0,0), so it's difficult to see whether the loadings are high in relation to one another.
foreach var in x y z {
	replace `var'=0 if !missing(`var') & `var'<0
	}

*adjust positions.
gen position=3 

